//TABLES AND FIGURES
//HAUSMAN AND MUEHLENBACHS
//"Price Regulation and Environmental Externalities: Evidence from Methane Leaks"

//////////////////////////////////////////////////////////////////////////////////////////////	
//SUMMARY STATS TABLE
/////////////////////////////////////////////////////////////////////////////////////////////
use $SNL/ldc_data12, clear

eststo: estpost sum vol_lauf_bcf  lauf_per_purchases purchases_bcf Total_End_User_tot_customers MainsLengthofPipe   ///
	MainsSteelUnprotBare MainsSteelUnprotCoated MainsSteelCathodiProtBare MainsSteelCathodiProtCoated ///
	MainsPlastic MainsCastIron MainsOther pmainsrank1 mains_age_avg ///
	price_city_real   
eststo: estpost sum vol_lauf_bcf lauf_per_purchases purchases_bcf Total_End_User_tot_customers MainsLengthofPipe  ///
	MainsSteelUnprotBare MainsSteelUnprotCoated MainsSteelCathodiProtBare MainsSteelCathodiProtCoated ///
	MainsPlastic MainsCastIron MainsOther pmainsrank1 mains_age_avg  ///	
	dist_om_real capital_real ///
	price_city_real   ///
	if (dist_om_real!=. | capital_real!=.)  


//////////////////////////////////////////////////////////////////////////////////////////////	
//LAUF MEASURE IS NOISY HISTOGRAM
/////////////////////////////////////////////////////////////////////////////////////////////
use $SNL/ldc_data12, clear
tw (hist lauf_per_purchases_trim1, color(gs10) fcolor(gs10) start(-35) width(1.5)) ///
	(hist lauf_per_purchases_trim1 if dist_om_real!=., color(green) fcolor(none) start(-35) width(1.5)) ///
	(hist lauf_per_purchases_trim1 if dist_om_real!=. [fweight=sales], color(cranberry) fcolor(none) start(-35) width(1.5) ///
	graphregion(color(white)) legend(order(1 "Full sample" 2 "Financial reporters" 3 "Financial reporters, weighted")) )

	
//////////////////////////////////////////////////////////////////////////////////////////////	
//FIGURES AND TABLES: LAUF IS NOT JUST NOISE
//////////////////////////////////////////////////////////////////////////////////////////////	
use $SNL/ldc_data12, clear
keep if Year>=2004
gen q=.
forval v=1(1)100{
	replace q=`v' if pmainsbad>=`v'-1 & pmainsbad<=`v'
}
replace q=. if pmainsbad==.
collapse (mean) lauf_per_purchases (count) count=lauf_per_purchases (sd) sd=lauf_per_purchases, by(q)	
replace count=. if  q==.
label variable count count
twoway (scatter lauf_per_purchases q, yscale(range(-8(5)12)) ylabel(-5(5)10) mcolor(gs8) ytitle("Percent of gas that is lost") ///
	graphregion(color(white)) xtitle("Percent of mains that are low-quality, binned") )	///
	(lfit lauf_per_purchases q, lcolor(black) lwidth(medthick) legend(off)) 
	
use $SNL/ldc_data12, clear
keep if Year>=2004
gen q=.
forval v=-1(1)100{
	replace q=`v' if mains_age_avg>=`v'-1 & mains_age_avg<=`v'
}
replace q=. if mains_age_avg==.
collapse (mean) lauf_per_purchases (count) count=lauf_per_purchases (sd) sd=lauf_per_purchases, by(q)	
replace count=. if  q==.
label variable count count
twoway (scatter lauf_per_purchases q, yscale(range(-8(5)12)) ylabel(-5(5)10) mcolor(gs8) ytitle("Percent of gas that is lost") ///
	graphregion(color(white)) xtitle("Average age of pipeline mains (years), binned") )	///
	(lfit lauf_per_purchases q, lcolor(black) lwidth(medthick) legend(off)) 
	

//APPENDIX TABLE: LAUF IS NOT JUST NOISE	
use $SNL/ldc_data12, clear
	gen pMainsOtherExpanded=pMainsDuct+pMainsCop+pMainsOther1
	replace pMainsOtherExpanded=pMainsOther+pMainsOther2 if pMainsOther2!=.
	
areg lauf_per_purchases mains_age_avg pmainsgood RY* , absorb(state_abbr) cluster(id)
areg lauf_per_purchases_trim1 mains_age_avg pmainsgood RY*  , absorb(state_abbr) cluster(id)
areg lauf_per_purchases mains_age_avg pmainsgood RY* if sample_bundled==1, absorb(state_abbr) cluster(id)
areg lauf_per_purchases mains_age_avg pMainsSteelUnprotCoated pMainsSteelCathodiProtBare ///
	 pMainsSteelCathodiProtCoated pMainsPlastic pMainsCastIron pMainsOtherExpanded ///
	RY* , absorb(state_abbr) cluster(id)
areg lauf_per_purchases mains_age_avg pmainsgood RY*  , absorb(id) cluster(id)


////////////////////////////////////////////////
////////// FIGURES: EVENT STUDIES //////////////
////////////////////////////////////////////////
use $SNL/ldc_data12, clear	
gen tempcapital_real=capital_real*1000
gen tempdist_om_real=dist_om_real*1000

keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")	

bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004
  
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales ///
	vol_lauf_trim1{
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	
  
gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	

xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) first
	 
keep if e(sample)	
		
sum historic_lauf_s, detail
gen treat50=(historic_lauf_s>=r(p50))

*** yearly dummies
	tab Year, g(D_yr_)
	
	foreach num of numlist 1(1)10 {
	local z=`num'+2003
	rename D_yr_`num' D_yr_`z'
	g TD50_yr_`z'=D_yr_`z'*treat50
	}
	
gen om=tempdist_om_real_s
gen lauf=vol_lauf_s

foreach v in om lauf {
	reg `v'  ///
	TD50_yr_2004-TD50_yr_2008 TD50_yr_2010-TD50_yr_2013 ///
	D_yr_2004-D_yr_2008 D_yr_2010-D_yr_2013 ///
	i.id, r cluster(id)
	foreach num of numlist 2004(1)2008  2010(1)2013 {
		scalar define b50`num'`v'=_b[TD50_yr_`num']
		scalar define s50`num'`v'=_se[TD50_yr_`num']
	}	
}

*** generate figures ******************
clear 
set obs 10
g year=2009 in 6
foreach q in  50 {
foreach variable in om lauf{
	g `variable'`q'_b=0 in 6
	g `variable'`q'_l=. in 6
	g `variable'`q'_u=. in 6
}
}

foreach num of numlist 1(1)5 7(1)10{
	local z=`num'+2003
	replace year = `z' in `num'
	
	foreach q in  50 {
	foreach variable in om lauf{	
		replace `variable'`q'_b=b`q'`z'`variable' in `num'
		replace `variable'`q'_l=b`q'`z'`variable'-1.96*s`q'`z'`variable' in `num'
		replace `variable'`q'_u=b`q'`z'`variable'+1.96*s`q'`z'`variable' in `num'
	}
	}
}

tw	(connected lauf50_b year, ms(circle) lp(l) mc(black) lc(black)) ///
	(line lauf50_l year, lp(dash) lc(black) lwidth(vthin) cmissing(n)) ///
	(line lauf50_u year, lp(dash) lc(black) lwidth(vthin) cmissing(n))	, ///
	legend(off) ///
	scheme(s1mono) graphregion(fcolor(white)) ///
	xline(2009.5) ytitle("Leak volume")  ///
	xlabel(2004(1)2013) ///
	ylabel(,grid gmin gmax)	
		

				
////////////////////////////////////////////////
//////////LDAR ABATEMENT COSTS /////////////////
////////////////////////////////////////////////
use $SNL/ldc_data12, clear

gen tempcapital_real=capital_real*1000
gen tempdist_om_real=dist_om_real*1000

sum price_city_real if tempdist_om_real!=. & vol_lauf!=. [aweight=sales]
global mb1=6.77 //financial reporters in our sample, weighted (note unweighted gives slightly higher)
sum price_city_real if tempdist_om_real!=. & vol_lauf!=. & Year>=2010 [aweight=sales]
global mb2=5.67 //financial reporters in our sample, weighted, post 2010
	
keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")	
	//need to do these here, or else include an "if" line when scaling/winsorizing

forval r=1(1)4{
	gen timer_`r'=(Year-2004) if region==`r'
		replace timer_`r'=0 if region!=`r'
	forval v=1(1)3{
		gen timer`v'_`r'=timer_`r'^`v'
	}
}
 
bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004
 
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales ///
	vol_lauf_trim1{
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	

////////////////////////////
//MAIN TABLE, with first stage in text
////////////////////////////
//OLS
areg tempdist_om_real_s vol_lauf_s ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	 if Year>=2004 , absorb(id) cluster(id)
lincom _b[vol_lauf_s]-(-$mb2)

//IV 	
//First stage
areg vol_lauf_s lauf_phmsa_s  ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	 if Year>=2004 & tempdist_om_real_s!=., absorb(id) cluster(id) 
//Second stage	
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	 if Year>=2004 , fe i(id) cluster(id) partial(RY*) first ///
	ffirst savefirst savefprefix(first1)  
lincom _b[vol_lauf_s]-(-$mb2)
	

//////////////////////////////////////////////////////////////////////////////////////////////	
	//APPENDIX: OTHER FORMS OF THE IV
/////////////////////////////////////////////////////////////////////////////////////////////

//laufp not lauf
gen temp1=lauf_per_purchases if Year>=2004 & Year<=2009
	bysort id: egen historic_laufp=mean(temp1)
	drop temp1 
	gen laufp_phmsa=historic_laufp*phmsa
xtivreg2 tempdist_om_real_s (vol_lauf_s = laufp_phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	 if Year>=2004 , fe i(id) cluster(id) partial(RY*) first  
	lincom _b[vol_lauf_s]-(-$mb2)

//alternative iv construction dates:
//first using only 2004 leak rate
gen temp1=vol_lauf_s if Year==2004   
	bysort id: egen historic_lauf1=mean(temp1)
	drop temp1 
	gen lauf1_phmsa=historic_lauf1*phmsa	 
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf1_phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004, fe i(id) cluster(id) partial(RY*) first  
	lincom _b[vol_lauf_s]-(-$mb2)

// uses data for 1995-2013, creates IV using data 1995-2009 
//need to recreate the data so that can export table using the same variable 
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales {
	drop `v'_s 
}
bysort id: egen scale2=mean(MainsLengthofPipe)	
// Using all years
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales {
	gen `v'_s=`v'/scale2 
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	
gen temp1=vol_lauf_s if Year>=1995 & Year<=2009 
	bysort id: egen historic_lauf2=mean(temp1)
	drop temp1 
	gen lauf2_phmsa=historic_lauf2*phmsa	
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf2_phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	, fe i(id) cluster(id) partial(RY*) first 
	lincom _b[vol_lauf_s]-(-$mb2)
//recreate data using only data post-2004 for regressions that follow 
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales {
	drop `v'_s 
}
drop scale2
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales {
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

 
//just first-difference source of variation. cubic time trends by region.
xtivreg2 tempdist_om_real_s (vol_lauf_s = phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s timer*   ///
	if Year>=2004 , fe i(id) cluster(id)  first 
	lincom _b[vol_lauf_s]-(-$mb2) 

//two sources of variation	 
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s timer*    ///
	if Year>=2004 , fe i(id) cluster(id)  first  
	lincom _b[vol_lauf_s]-(-$mb2)

/////////////////////	
//APPENDIX: INVERSE
/////////////////////
//OLS
areg vol_lauf_s tempdist_om_real_s  ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004, absorb(id) cluster(id)  	
	nlcom 1/_b[tempdist_om_real_s]
	nlcom 1/_b[tempdist_om_real_s]-(-$mb2)

//IV
xtivreg2 vol_lauf_s (tempdist_om_real_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004  , fe i(id) cluster(id) partial(RY*) first 
	nlcom 1/_b[tempdist_om_real_s]
	nlcom 1/_b[tempdist_om_real_s]-(-$mb2)
	
 	

/////////////////////	
//APPENDIX: OTHER CONTROLS	AND SAMPLES
/////////////////////	
//year effects
xtivreg2 tempdist_om_real_s (vol_lauf_s =  lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s YY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(YY*) 
	lincom _b[vol_lauf_s]-(-$mb2)
	 
//regional cubic time trends
xtivreg2 tempdist_om_real_s (vol_lauf_s =  lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s timer* phmsa  ///
	if Year>=2004 , fe i(id) cluster(id) 
	lincom _b[vol_lauf_s]-(-$mb2)	 
	 
//fewer controls
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	RY*  ///
	if Year>=2004   , fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)
	
//cubic supply, and price
quietly sum sales_s if vol_lauf!=. & tempdist_om_real!=.
forval v=1(1)3{
	gen sales_s`v'=((sales_s-r(mean))/r(sd))^`v'
}
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s* price_resi_real RY*  ///
	if Year>=2004   , fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)
	drop sales_s1 sales_s2 sales_s3 
	
// parallel trends concern: controlling for linear trends interacted with IV	
gen leaks_year_s=historic_lauf_s*Year
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	leaks_year_s if Year>=2004  , fe i(id) cluster(id) partial(RY*) first 
	lincom _b[vol_lauf_s]-(-$mb2)
	 
// parallel trends concern: controlling for linear trends by utility		
gen idtemp=id if vol_lauf_s!=. & dist_om_real!=.
gen trend=Year-2004
xi i.idtemp*trend, prefix(TT)
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY* TT*   ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY* TT*) first 
	lincom _b[vol_lauf_s]-(-$mb2)
	 
//controlling for citygate
xtivreg2 tempdist_om_real_s (vol_lauf_s =  lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s price_city_real RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)
	  
//balanced residential bundled only
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004 & sample_bundled==1, fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)

//no CA	
xtivreg2 tempdist_om_real_s (vol_lauf_s =lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004 & state_abbr!="CA", fe i(id) cluster(id) partial(RY*) 	
	lincom _b[vol_lauf_s]-(-$mb2)
	 
//no east coast	
xtivreg2 tempdist_om_real_s (vol_lauf_s =lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004 & region!=1, fe i(id) cluster(id) partial(RY*) 	
	lincom _b[vol_lauf_s]-(-$mb2)

xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) first 
	sum historic_lauf_s if e(sample), detail
	gen q=1 if historic_lauf_s<r(p25)
	replace q=2 if historic_lauf_s>=r(p25) & historic_lauf_s<r(p50)
	replace q=3 if historic_lauf_s>=r(p50) & historic_lauf_s<r(p75)
	replace q=4 if historic_lauf_s>=r(p75)
 
//exclduing the highest quartile of historical leak volume
xtivreg2 tempdist_om_real_s (vol_lauf_s =lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004 & q!=4, fe i(id) cluster(id) partial(RY*) 	
	lincom _b[vol_lauf_s]-(-$mb2) 
	 
//sample has overlap
gen sampleoverlap=1
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) first 
	gen temp=e(sample)
foreach v in MainsLengthofPipe MainsLengthofPipe_s sales_s SerTotalNumberof_s servsbad_s mainsbad_s {
	sum `v' if q<=2 & temp==1
	replace sampleoverlap=0 if `v'>r(max)
	replace sampleoverlap=0 if `v'<r(min)
}
drop temp
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 & sampleoverlap==1, fe i(id) cluster(id) partial(RY*) first
	lincom _b[vol_lauf_s]-(-$mb2)	 
	
	
//weighting (weight by fraction of incoming volume from citygate receipts)
//(i.e., not engaged in much storage or transmission on the incoming end)	 
gen tempa=ReceiptsatCitygate2013/ ReceiptsAll2013 if Year>=2004
bysort id: egen temp2a=max(tempa)
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY* ///
	[aweight=temp2a] if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	estadd scalar fstat=e(widstat)
	lincom _b[vol_lauf_s]-(-$mb2)
	 
//different definition of lauf: trimiming at 1 percent	 
gen temp1=vol_lauf_trim1_s if Year>=2004 & Year<=2009
	bysort id: egen althistoric_lauf=mean(temp1)
	drop temp1 
	gen altlauf_phmsa=althistoric_lauf*phmsa	
gen temp=vol_lauf_s /*for table creation*/ 
replace vol_lauf_s=vol_lauf_trim1_s /*for table creation*/ 
xtivreg2 tempdist_om_real_s (vol_lauf_s = altlauf_phmsa) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004  , fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)	
replace vol_lauf_s=temp
drop temp
drop althistoric_lauf altlauf_phmsa	

//dropping instead of winsorizing
foreach v in tempdist_om_real vol_lauf mainsbad MainsLengthofPipe SerTotalNumberof servsbad sales {
	gen alt`v'=`v'/scale  if Year>=2004
	quietly sum alt`v', detail
	replace alt`v'=. if alt`v'>r(p99) & alt`v'!=.
	replace alt`v'=. if alt`v'<r(p1) & alt`v'!=.
}
gen temp1=altvol_lauf if Year>=2004 & Year<=2009
	bysort id: egen althistoric_lauf=mean(temp1)
	drop temp1 
	gen altlauf_phmsa=althistoric_lauf*phmsa	
gen temp=vol_lauf_s /*for table creation*/ 
replace vol_lauf_s=altvol_lauf /*for table creation*/ 
xtivreg2 alttempdist_om_real (vol_lauf_s  = altlauf_phmsa) ///
	altMainsLengthofPipe altmainsbad altSerTotalNumberof altservsbad altsales RY* ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	lincom _b[vol_lauf_s]-(-$mb2)

drop alt*	
replace vol_lauf_s = temp
drop temp

//no winsorizing or dropping 
foreach v in tempdist_om_real vol_lauf mainsbad MainsLengthofPipe SerTotalNumberof servsbad sales {
	gen alt`v'=`v'/scale  if Year>=2004
}
gen temp1=altvol_lauf if Year>=2004 & Year<=2009
	bysort id: egen althistoric_lauf=mean(temp1)
	drop temp1 
	gen altlauf_phmsa=althistoric_lauf*phmsa	
gen temp=vol_lauf_s /*for table creation*/ 
replace vol_lauf_s=altvol_lauf /*for table creation*/ 
xtivreg2 alttempdist_om_real (vol_lauf_s  = altlauf_phmsa) ///
	altMainsLengthofPipe altmainsbad altSerTotalNumberof altservsbad altsales RY* ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	lincom _b[vol_lauf_s]-(-$mb2)

drop alt*	
replace vol_lauf_s = temp
drop temp

//no scaling 
gen temp=vol_lauf_s /*for table creation*/ 
replace vol_lauf_s=vol_lauf /*for table creation*/ 
xtivreg2 tempdist_om_real (vol_lauf_s  = lauf_phmsa) ///
	MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales RY* ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	lincom _b[vol_lauf_s]-(-$mb2)
replace vol_lauf_s = temp
drop temp

// outcome variable including capital  
rename Total_Distribution_O_M_Expense Distribution
rename ExpenseTotal_Trans_O_M Transmission
rename ExpenseTotal_Storage Storage 
rename Total_Cust_Accounts_Expense Accounts
rename Total_CSI_Expense Information
rename Total_Sales_Expense Sales
rename Total_Administrative_General_O_M Administrative
rename ExpenseTotal_Production Production
rename Distribution_PltGasAdd DCapital
foreach v in Distribution Transmission Storage Accounts Information Sales Administrative ///
	Production DCapital {
	replace `v'=`v'/cpi*$cpi2015*1000 //$, not $000s
}
gen appendix=Distribution+DCapital/10 //assuming capital projects last 10 years

foreach v in appendix {
	gen `v'_s=`v'/scale
	quietly sum `v'_s, detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}

xtivreg2 appendix_s (vol_lauf_s = lauf_phmsa_s) ///
	MainsLengthofPipe_s mainsbad_s SerTotalNumberof_s servsbad_s sales_s RY* ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	lincom _b[vol_lauf_s]-(-$mb2)
 
drop appendix appendix_s


 // outcome variable including transmission, storage etc. expenditures  
gen appendix=Distribution+Trans+Storage+Info+Sales+Admin+DCapital/10 //assuming capital projects last 10 years

foreach v in appendix {
	gen `v'_s=`v'/scale
	quietly sum `v'_s, detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}

xtivreg2 appendix_s (vol_lauf_s = lauf_phmsa_s) ///
	MainsLengthofPipe_s mainsbad_s SerTotalNumberof_s servsbad_s sales_s RY* ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
	lincom _b[vol_lauf_s]-(-$mb2)
	drop appendix appendix_s


//long-run variation
gen period=1 if Year>=2004 & Year<2010
replace period=2 if Year>=2010 & Year<2015
collapse (mean) tempdist_om_real_s  vol_lauf_s lauf_phmsa_s ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s, by(period id region Company)
	xi i.region*i.period, prefix(RY)
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if period!=., fe i(id) cluster(id) partial(RY*) 
	lincom _b[vol_lauf_s]-(-$mb2)


	
///////////////////// 
//APPENDIX: HETEROGENEITY OF LDAR COST BY DENSITY
/////////////////////	
use $SNL/ldc_data12, clear

gen tempcapital_real=capital_real*1000
gen tempdist_om_real=dist_om_real*1000

keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")	
	//need to do these here, or else include an "if" line when scaling/winsorizing

bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004
  
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales ///
	vol_lauf_trim1{
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	

xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) first
	
gen density=NumSerTotal/MainsLengthofPipe
bysort id: egen temp=mean(density) if Year>=2004
	xtile qdensity=temp, nq(4)
	drop temp
	table qdensity, contents(mean density)
	
forval v=1(1)4{
xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 & qdensity==`v', fe i(id) cluster(id) partial(RY*) first	 
	lincom _b[vol_lauf_s]-(-$mb2)
}	

	
/////////////////////	
//APPENDIX: SUBCATEGORIES OF OM. ONE ROW PER DEPENDENT VARIABLE. 
/////////////////////
use $SNL/ldc_data12, clear

keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")	

bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004

foreach v in vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales {
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s, detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	

rename D_Op_Distr_Load_Dispatching D_Op_Distr_LoadDisp
rename D_Op_Comp_Stat_Fuel_Power D_Op_CompStatFuelPower
rename D_Op_Meas_Reg_St_City_Gate D_Op_MeasRegStCityGate
rename D_Op_Meter_House_Regulator D_Op_MeterHouseReg
rename D_Maint_Comp_St_Equipment D_Maint_CompStEquip
rename D_Maint_Meas_Reg_St_General D_Maint_MeasRegStGeneral
rename D_Maint_Meas_Reg_Stat_Ind D_Maint_MeasRegStatInd
rename D_Maint_Meas_Reg_St_City_Gate D_Maint_MeasRegStCityG
rename Total_Distribution_O_M_Expense Total_Dist_OM_Expense

foreach v of varlist Total_Dist_OM_Expense D_*{
	gen r`v'=`v'/cpi*$cpi2015*1000 //$ not $000s
	gen r`v'_s=r`v'/scale if Year>=2004
	quietly sum r`v'_s, detail
	replace r`v'_s=r(p99) if r`v'_s>r(p99) & r`v'_s!=.
	replace r`v'_s=r(p1) if r`v'_s<r(p1) & r`v'_s!=.
}	

foreach v of varlist Total_Dist_OM_Expense D_*{
display "*****`v'"
areg r`v'_s vol_lauf_s ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004, absorb(id) cluster(id) 
xtivreg2 r`v'_s (vol_lauf_s = lauf_phmsa_s)   ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s RY*  ///
	if Year>=2004, fe i(id) cluster(id) first partial(RY*)
}


///////////////////////////////////////		
//CAPITAL AND PIPELINE REPLACEMENT
///////////////////////////////////////	
use $SNL/ldc_data12, clear

gen temp=mainsbad if Year==2004
	bysort id: egen historic_bad=max(temp)
	drop temp

gen temp=mainsbad/MainsLengthofPipe if Year==2004
	bysort id: egen historic_pbad=max(temp)
	drop temp

gen temp=mainsbad if Year==1995
	bysort id: egen historic_bad1995=max(temp)
	drop temp 
	
gen temp=mainsbad if Year==1998
	bysort id: egen historic_bad1998=max(temp)
	drop temp 
	
gen temp1=mainsbad if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=mainsbad if Year==2013
bysort id: egen temp2b=max(temp2)
gen replacement=temp1b-temp2b
drop temp*

gen temp1=capital_real if Year>=2004
bysort id: egen temp2=mean(temp) //averaging not summing in case missing years
gen shortacccapital=temp2*(2013-2004)
drop temp*

gen temp1=total_real if Year>=2004
bysort id: egen temp2=mean(temp)
gen shortacctotal=temp2*(2013-2004)
drop temp*

gen temp1=TotGas_Plt_in_ServiceAdd if Year>=2004
bysort id: egen temp2=mean(temp)
gen shortacccapitalall=temp2*(2013-2004)
drop temp*

gen temp1=sales_bcf if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=sales_bcf if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdsales1=temp2b-temp1b
drop temp*

gen temp1=MainsLengthofPipe if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=MainsLengthofPipe if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdMainsLengthofPipe=temp2b-temp1b
drop temp*

gen temp1=Total_End_User_tot_customers if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=Total_End_User_tot_customers if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdTotal_End_User=temp2b-temp1b
drop temp*

gen temp1=TotPlt_inServ_YrEnd if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=TotPlt_inServ_YrEnd if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdtotalplant=temp2b-temp1b //long difference of accumulated capital
drop temp*

keep if capital_real!=. & sales!=. & mainsbad!=.
drop if strmatch(Company,"Pacific*")

gen temp=MainsLengthofPipe  if Year>=2004
bysort id: egen scale=mean(temp)

foreach v in shortacccapital replacement historic_bad historic_bad1995  shortacctotal shortacccapitalall shortdtotalplant ///
	shortdsales1 shortdMainsLengthofPipe shortdTotal_End_User {
	gen alt1`v'=`v'/scale  
	quietly sum alt1`v' if Year==2013, detail //only trim outliers for the 2013 observations that we actually use
	replace alt1`v'=r(p99) if alt1`v'>r(p99) & alt1`v'!=. & Year==2013
	replace alt1`v'=r(p1) if alt1`v'<r(p1) & alt1`v'!=. & Year==2013
}

//first stage:
ivreg2 alt1shortacccapital (alt1replacement= alt1historic_bad) ///
	  alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User  i.region ///
	if Year==2013 & shortacctotal!=.  , first ffirst savefirst savefprefix(first1) robust //preferred
			 
//OLS
reg alt1shortacccapital alt1replacement  ///
	  alt1shortdsales1   alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=.,  robust
			 
//2sls:
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=. , first robust



//appendix, robustness 
ivreg2 alt1shortacctotal (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=., first robust
ivreg2 alt1shortacccapitalall (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=., first robust
ivreg2 alt1shortdtotalplant (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=. & strmatch(Company,"Pacific*")==0, first robust
/*control for citygate*/ 
	gen temp1=price_city_real if Year==2004
	bysort id: egen temp1b=max(temp1)
	gen temp2=price_city_real if Year==2013
	bysort id: egen temp2b=max(temp2)
	gen shortdprice=temp2b-temp1b
	drop temp*
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User shortdprice i.region ///
	if Year==2013 & shortacctotal!=. , first robust
/*no controls*/ 
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	if Year==2013 & shortacctotal!=., first robust
// Using lag from 1995
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad1995) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=. , first robust
//weight by high citygate portion (ie not transmission)
gen tempa=ReceiptsatCitygate2013/ ReceiptsAll2013 if Year>2004
bysort id: egen temp2a=max(tempa)
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	[aweight=temp2a] if Year==2013 & shortacctotal!=. ///
	, first robust  
	drop temp*
 	
// No scaling 
gen tempreplacement=alt1replacement
replace alt1replacement=replacement /* need to have the coefficient we want printed in the table to have the same name in all specs*/ 
ivreg2 shortacccapital (alt1replacement = historic_bad) ///
	shortdsales1 shortdMainsLengthofPipe shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=., first robust
replace alt1replacement=tempreplacement
drop tempreplacement	


//Using 1998-2013 instead of 2004-2013
gen temp1=mainsbad if Year==1998
bysort id: egen temp1b=max(temp1)
gen temp2=mainsbad if Year==2013
bysort id: egen temp2b=max(temp2)
replace replacement=temp1b-temp2b
drop temp*

gen temp1=capital_real if Year>=1998
bysort id: egen temp2=mean(temp) //averaging not summing in case missing years
replace shortacccapital=temp2*(2013-1998)
drop temp*

gen temp1=sales_bcf if Year==1998
bysort id: egen temp1b=max(temp1)
gen temp2=sales_bcf if Year==2013
bysort id: egen temp2b=max(temp2)
replace shortdsales1=temp2b-temp1b
drop temp*

gen temp1=MainsLengthofPipe if Year==1998
bysort id: egen temp1b=max(temp1)
gen temp2=MainsLengthofPipe if Year==2013
bysort id: egen temp2b=max(temp2)
replace shortdMainsLengthofPipe=temp2b-temp1b
drop temp*

gen temp1=Total_End_User_tot_customers if Year==1998
bysort id: egen temp1b=max(temp1)
gen temp2=Total_End_User_tot_customers if Year==2013
bysort id: egen temp2b=max(temp2)
replace shortdTotal_End_User=temp2b-temp1b
drop temp*

gen temp=MainsLengthofPipe  if Year>=1998
bysort id: egen scale2=mean(temp)

foreach v in shortacccapital replacement historic_bad1998 ///
	shortdsales1 shortdMainsLengthofPipe shortdTotal_End_User {
	gen alt2`v'=`v'/scale2  
	quietly sum alt2`v' if Year==2013, detail //only trim outliers for the 2013 observations that we actually use
	replace alt2`v'=r(p99) if alt2`v'>r(p99) & alt2`v'!=. & Year==2013
	replace alt2`v'=r(p1) if alt2`v'<r(p1) & alt2`v'!=. & Year==2013
}

gen tempreplacement=alt1replacement
replace alt1replacement=alt2replacement /* need to have the coefficient we want printed in the table to have the same name in all specs*/ 

ivreg2 alt2shortacccapital (alt1replacement = alt2historic_bad1998) ///
	alt2shortdsales1 alt2shortdMainsLengthofPipe alt2shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=., first robust

replace alt1replacement=tempreplacement
drop tempreplacement	 
 
	 
	
///////////////////////////////////////
//APPENDIX: HETEROGENEITY OF REPLACEMENT COST BY DENSITY
///////////////////////////////////////		
use $SNL/ldc_data12, clear

gen temp=mainsbad if Year==2004
	bysort id: egen historic_bad=max(temp)
	drop temp
	
gen temp1=mainsbad if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=mainsbad if Year==2013
bysort id: egen temp2b=max(temp2)
gen replacement=temp1b-temp2b
drop temp*

gen temp1=capital_real if Year>=2004
bysort id: egen temp2=mean(temp) //averaging not summing in case missing years
gen shortacccapital=temp2*(2013-2004)
drop temp*

gen temp1=total_real if Year>=2004
bysort id: egen temp2=mean(temp)
gen shortacctotal=temp2*(2013-2004)
drop temp*

gen temp1=sales_bcf if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=sales_bcf if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdsales1=temp2b-temp1b
drop temp*

gen temp1=MainsLengthofPipe if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=MainsLengthofPipe if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdMainsLengthofPipe=temp2b-temp1b
drop temp*

gen temp1=Total_End_User_tot_customers if Year==2004
bysort id: egen temp1b=max(temp1)
gen temp2=Total_End_User_tot_customers if Year==2013
bysort id: egen temp2b=max(temp2)
gen shortdTotal_End_User=temp2b-temp1b
drop temp*


keep if capital_real!=. & sales!=. & mainsbad!=.
drop if strmatch(Company,"Pacific*")

gen temp=MainsLengthofPipe  if Year>=2004
bysort id: egen scale=mean(temp)
drop temp

foreach v in shortacccapital replacement historic_bad shortacctotal ///
	shortdsales1 shortdMainsLengthofPipe shortdTotal_End_User {
	gen alt1`v'=`v'/scale  
	quietly sum alt1`v' if Year==2013, detail //only trim outliers for the 2013 observations that we actually use
	replace alt1`v'=r(p99) if alt1`v'>r(p99) & alt1`v'!=. & Year==2013
	replace alt1`v'=r(p1) if alt1`v'<r(p1) & alt1`v'!=. & Year==2013
}
			 
reg alt1shortacccapital alt1replacement  ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=.,  robust
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=. , first robust
	
gen density=NumSerTotal/MainsLengthofPipe
bysort id: egen temp=mean(density) if Year>=2004
	xtile qdensity=temp, nq(4)
	drop temp
	table qdensity, contents(mean density)

forval v=1(1)4{
ivreg2 alt1shortacccapital (alt1replacement = alt1historic_bad) ///
	alt1shortdsales1 alt1shortdMainsLengthofPipe alt1shortdTotal_End_User i.region ///
	if Year==2013 & shortacctotal!=. & qdensity==`v', first robust
}	 


///////////////////////////////////////		
//WHAT DRIVES DECISIONS TO SPEND MONEY ON ABATEMENT? (REDUCED FORM)
///////////////////////////////////////		
use $SNL/ldc_data12, clear

gen temptotal_real=total_real*1000 //dollars
gen tempcapital_real=capital_real*1000 //dollars
gen tempdist_om_real=dist_om_real*1000 //dollars

keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")
	
bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004
		
foreach v in temptotal_real tempdist_om_real tempcapital_real ///
	MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales vol_lauf  {
	gen `v'_s=`v'/(scale) if Year>=2004 
	sum `v'_s , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=. 
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	
	
areg temptotal_real_s price_city_real proportionratecase proportiontestyear ///
	pipeline_rider lauf_phmsa_s  ///
	MainsLengthofPipe_s SerTotalNumberof_s sales_s RY* if Year>=2004 , ///
	absorb(id) cluster(id)
areg tempdist_om_real_s price_city_real proportionratecase proportiontestyear ///
	pipeline_rider lauf_phmsa_s  ///
	MainsLengthofPipe_s SerTotalNumberof_s sales_s RY* if Year>=2004 , ///
	absorb(id) cluster(id)
areg tempcapital_real_s price_city_real proportionratecase proportiontestyear ///
	pipeline_rider lauf_phmsa_s  ///
	MainsLengthofPipe_s SerTotalNumberof_s sales_s RY* if Year>=2004 , ///
	absorb(id) cluster(id)

     
	  
//robustness, for appendix
areg temptotal_real_s lauf_phmsa_s price_city_real proportionratecase proportiontestyear ///
	pipeline_rider    ///
	MainsLengthofPipe_s SerTotalNumberof_s sales_s YY*  if Year>=2004 , ///
	absorb(id) cluster(id)
areg temptotal_real_s lauf_phmsa_s price_city_real proportionratecase proportiontestyear ///
	pipeline_rider   ///
	MainsLengthofPipe_s SerTotalNumberof_s sales_s RY* if sample_bundled==1 & Year>=2004 , ///
	absorb(id) cluster(id)
//replicates the reduced form that underlies the 2SLS results from Table 3.
areg tempdist_om_real_s lauf_phmsa_s  ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 & vol_lauf_s!=. , absorb(id) cluster(id) 



///////////////////////////////////////
//$/MCF, WHEN REPLACING PIPES
///////////////////////////////////////		

//reduced om, simple average -- OM offset, dollars per mile, from analysis group
disp 1/6*(2077+4557+2518+1645+3537+3959) //3049

/* FIRST COLUMN */ 
local alpha = 1221890  
local beta =  470
local r = 0.03
local years = 39
local offset = -3049

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'
	
/* SECOND COLUMN */ 	
local alpha = 607170 
local beta =  470
local r = 0.03
local years = 39
local offset = -3049

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'
	
/* THIRD COLUMN */ 	
local alpha = 1221890  
local beta =  229
local r = 0.03
local years = 39
local offset = -3049

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'

/* FOURTH COLUMN  */ 	
local alpha = 1221890 
local beta =  470
local r = 0.07
local years = 39
local offset = -3049

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'

/* FIFTH COLUMN  */ 	
local alpha = 1221890 
local beta =  470
local r = 0.03
local years = 59
local offset = -3049

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'

/* SIXTH COLUMN  */ 	
local alpha = 1221890 
local beta =  470
local r = 0.03
local years = 39
local offset = -0

local denominator = `beta'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local denominator = `denominator' + `beta'/((1+`r')^`n')
}	
}
local numerator = `alpha'/((1+`r')^0)
forval n=1(1)60{
	if `years'>=`n'{
		local numerator = `numerator' + `offset'/((1+`r')^`n')
}	
}

display "levelized cost, $/mcf"
display `numerator'/`denominator'
		

///////////////////////////////////////
//COVARIATE BALANCE
///////////////////////////////////////		
use $SNL/ldc_data12, clear	
gen tempcapital_real=capital_real*1000
label var tempcapital_real  "Capital expenses, \\$"  
gen tempdist_om_real=dist_om_real*1000
label var tempdist_om_real  "O\&M expenses, \\$"  

keep if dist_om_real!=. & sales!=.
drop if strmatch(Company,"Pacific*")	

bysort id: egen scale=mean(MainsLengthofPipe) if Year>=2004
  
foreach v in tempdist_om_real vol_lauf MainsLengthofPipe mainsbad SerTotalNumberof servsbad sales ///
	vol_lauf_trim1{
	gen `v'_s=`v'/scale if Year>=2004
	quietly sum `v'_s  , detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=.
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=.
}	

gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	

xtivreg2 tempdist_om_real_s (vol_lauf_s = lauf_phmsa_s ) ///
	mainsbad_s MainsLengthofPipe_s SerTotalNumberof_s servsbad_s sales_s  RY*  ///
	if Year>=2004 , fe i(id) cluster(id) partial(RY*) first
	 
keep if e(sample)	

sum historic_lauf_s, detail
gen q=1 if historic_lauf_s<r(p25)
	replace q=2 if historic_lauf_s>=r(p25) & historic_lauf_s<r(p50)
	replace q=3 if historic_lauf_s>=r(p50) & historic_lauf_s<r(p75)
	replace q=4 if historic_lauf_s>=r(p75)
tab q if Year<2010

gen q2=(q==2)
gen q3=(q==3)
gen q4=(q==4)

//TABLE
foreach v in MainsLengthofPipe sales_s SerTotalNumberof_s mainsbad_s servsbad_s {
	reg `v' q2 q3 q4 if Year>=2004 & Year<2010, cluster(id)
	estimates store b`v'
		lincom q4-q2
		lincom q3-q2
		lincom q4-q3
}
	
//HISTOGRAMS
gen qsample=q if Year>=2004 & Year<2010 & tempdist_om_real_s!=. //so histogram is pre-treatment only

tw (kdensity MainsLengthofPipe if qs==1, lcolor(gs0)) ///
	(kdensity MainsLengthofPipe if qs==2, lcolor(gs4)) ///
	(kdensity MainsLengthofPipe if qs==3, lcolor(gs8)) ///
	(kdensity MainsLengthofPipe if qs==4, lcolor(gs12) ///
	legend(label(1 "Q1") label(2 "Q2") label( 3 "Q3") label(4 "Q4")) ///
	xtitle("Mains, unscaled") ytitle("") graphregion(color(white)) )
tw (kdensity sales_s if qs==1, lcolor(gs0)) ///
	(kdensity sales_s if qs==2, lcolor(gs4)) ///
	(kdensity sales_s if qs==3, lcolor(gs8)) ///
	(kdensity sales_s if qs==4, lcolor(gs12) ///
	legend(label(1 "Q1") label(2 "Q2") label( 3 "Q3") label(4 "Q4")) ///
	xtitle("Volume sold") ytitle("") graphregion(color(white)) ) 
tw (kdensity SerTotalNumberof_s if qs==1, lcolor(gs0)) ///
	(kdensity SerTotalNumberof_s if qs==2, lcolor(gs4)) ///
	(kdensity SerTotalNumberof_s if qs==3, lcolor(gs8)) ///
	(kdensity SerTotalNumberof_s if qs==4, lcolor(gs12) ///
	legend(label(1 "Q1") label(2 "Q2") label( 3 "Q3") label(4 "Q4")) ///
	xtitle("Service lines") ytitle("") graphregion(color(white)) )
tw (kdensity mainsbad_s if qs==1, lcolor(gs0)) ///
	(kdensity mainsbad_s if qs==2, lcolor(gs4)) ///
	(kdensity mainsbad_s if qs==3, lcolor(gs8)) ///
	(kdensity mainsbad_s if qs==4, lcolor(gs12) ///
	legend(label(1 "Q1") label(2 "Q2") label( 3 "Q3") label(4 "Q4")) ///
	xtitle("Low-quality mains") ytitle("") graphregion(color(white)) )
tw (kdensity servsbad_s if qs==1, lcolor(gs0)) ///
	(kdensity servsbad_s if qs==2, lcolor(gs4)) ///
	(kdensity servsbad_s if qs==3, lcolor(gs8)) ///
	(kdensity servsbad_s if qs==4, lcolor(gs12) ///
	legend(label(1 "Q1") label(2 "Q2") label( 3 "Q3") label(4 "Q4")) ///
	xtitle("Low-quality service lines") ytitle("") graphregion(color(white)) ) 



///////////////////////////////////////
//APPENDIX: LEAKS ABATED PER MILE REPLACED
///////////////////////////////////////
use $SNL/ldc_data12, clear
	
keep if Year>=2004

keep if sales!=.
drop if strmatch(Company,"Pacific*")

bysort id: egen scale=mean(MainsLengthofPipe)
xtset id Year 
foreach v in vol_lauf mainsrank1 mainsrank2 sales MainsLengthofPipe {
	gen `v'_s=`v'/scale
	quietly sum `v'_s, detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=. 
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=. 
}

sort id Year
foreach v in mainsrank1_s mainsrank2_s sales_s MainsLengthofPipe_s {
	by id: gen l`v'=`v'[_n-1]
}

//REGS 
reg vol_lauf_s lmainsrank1_s lmainsrank2_s lsales_s lMainsLengthofPipe_s RY* 

reg vol_lauf_s lmainsrank1_s lmainsrank2_s lsales_s lMainsLengthofPipe_s RY* ///
	if dist_om_real!=. 

 
///////////////////////////////////////		
//SAFETY REGRESSIONS FOR APPENDIX
///////////////////////////////////////		
use $SNL/ldc_data12, clear

merge 1:m Subfiler Year using  "$otherdata\PHMSA\AccidentswSNLkey.dta"
	bysort Subfiler_Key: egen sampleAccident=max(totincident) 
	gen hadaccident=(sampleAccident>0 & sampleAccident!=.)
	drop sampleAccident
	bysort Subfiler_Key: egen firstyearaccident=min(firstyear) 
	drop firstyear
	drop if _m==2
	
foreach var in totfatal totinjure totprpty totincident {
replace `var'=0 if `var'==. & hadaccident==1
}
gen totprptyreal=totprpty/cpi*$cpi2015
gen totdamage=totprptyreal+totfatal*9.1*10^6+totinjur*1*10^6
gen totdamagealt=totprptyreal+(totfatal*9.1*10^6)

gen neworleanskatrina=(Subf==1049 & Year==2005)
drop if neworleanskatrina==1

sum totincident if hadaccident==1 //1 every 3.5 years (remembering many of these are excavation related)

drop if strmatch(Company,"Pacific Gas*")

keep if Year>=2004	
	
keep if sales!=.

bysort id: egen scale=mean(MainsLengthofPipe)
 
foreach v in totdamage totdamagealt vol_lauf mainsbad MainsLengthofPipe sales_bcf totincident{
	gen `v'_s=`v'/scale
	quietly sum `v'_s, detail
	replace `v'_s=r(p99) if `v'_s>r(p99) & `v'_s!=. 
	replace `v'_s=r(p1) if `v'_s<r(p1) & `v'_s!=. 
}

gen temp=mainsbad_s if Year==2004
	bysort id: egen historic_bad_s=max(temp)
	drop temp
	gen bad_phmsa_s=historic_bad_s*phmsa
gen temp1=vol_lauf_s if Year<=2009
	bysort id: egen historic_lauf_s=mean(temp1)
	drop temp1 
	gen lauf_phmsa_s=historic_lauf_s*phmsa	
	
reg totdamage_s vol_lauf_s mainsbad_s ///
	MainsLengthofPipe_s sales_bcf_s RY* if hadacc==1, cluster(id) 
	lincom mainsbad_s/229
	lincom mainsbad_s/470
	lincom vol_lauf_s + mainsbad_s/229
areg totdamage_s vol_lauf_s mainsbad_s ///
	MainsLengthofPipe_s sales_bcf_s RY* if hadacc==1, absorb(id) cluster(id)
	lincom mainsbad_s/229
	lincom mainsbad_s/470
	lincom vol_lauf_s + mainsbad_s/229
xtivreg2 totdamage_s (vol_lauf_s mainsbad_s = bad_phmsa_s lauf_phmsa_s) ///
	MainsLengthofPipe_s sales_bcf_s RY* if hadacc==1, fe i(id) cluster(id) partial(RY*)
	lincom mainsbad_s/229
	lincom mainsbad_s/470
	lincom vol_lauf_s + mainsbad_s/229 	
